This project explores the development of neural network models to classify bird sounds from the Seattle area, Using Binary Model, Multi-class Model and External Test Data using spectrograms generated from audio recordings.
LibrariesΒΆ
InΒ [1]:
# Load necessary libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
np.complex = complex
import librosa
import librosa.display
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, ConfusionMatrixDisplay, mean_absolute_error
from tensorflow.image import resize
from skimage.transform import resize
import h5py
Load datasetΒΆ
We start by loading the preprocessed bird voice spectrogram dataset . The dataset contains 12 bird species representing the frequency patterns of bird voice.
InΒ [3]:
with h5py.File('bird_spectrograms.hdf5', 'r') as file:
species_names = list(file.keys())
print("Available Species:", species_names)
for bird in species_names:
print(f"{bird} shape: {file[bird].shape}")
Available Species: ['amecro', 'amerob', 'bewwre', 'bkcchi', 'daejun', 'houfin', 'houspa', 'norfli', 'rewbla', 'sonspa', 'spotow', 'whcspa'] amecro shape: (128, 517, 66) amerob shape: (128, 517, 172) bewwre shape: (128, 517, 144) bkcchi shape: (128, 517, 45) daejun shape: (128, 517, 125) houfin shape: (128, 517, 84) houspa shape: (128, 517, 630) norfli shape: (128, 517, 37) rewbla shape: (128, 517, 187) sonspa shape: (128, 517, 263) spotow shape: (128, 517, 137) whcspa shape: (128, 517, 91)